library(tidyverse)
library(here)
#library(ggtext)
#library(spatstat.core)
#library(patchwork)
theme_set(theme_light(base_size = 11.5, base_family = "Open Sans"))
theme_update(
panel.grid.major = element_line(size = .3, color = "grey93"),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank(),
strip.background = element_rect(fill = "grey60", colour = "grey60"),
strip.text = element_text(size = 14, face = "bold"),
axis.title.x = element_text(size = 14, margin = margin(t = 12)),
axis.title.y = element_text(size = 14, margin = margin(r = 12)),
axis.text = element_text(size = 12),
legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12),
plot.title = element_text(size = 24, face = "bold", margin = margin(5, 0, 5, 0)),
plot.subtitle = element_text(margin = margin(5, 0, 25, 0), size = 17),
plot.title.position = "plot",
plot.margin = margin(rep(12, 4))
)
## source functions to generate and visualize distributions
source(here("R", "src", "generate-distributions.R"))
Is the predictive power of the current acoustic monitoring sufficient enough to estimate the true number of killed bats?
The predictive power of the acoustic monitoring for the extrapolation of the expected number of stroke victims is decreasing
in case the crossing bats are not distributed uniformly or randomly.
with increasing length of the rotor blades.
for species with high-frequency echo calls.
We have the following parameters we can vary:
radius_rotor) - 60 m versus 33 m
area_rotor as pi * radius_rotor^2)proportion_covered)
area_monitored as area_rotor * proportion_covered)radius_monitored as sqrt(area_monitored / pi))n)
rds <- here("output", "data-proc", "simulation-runs-corr-trial.rds")
if (!file.exists(rds)) {
scenarios <- simulate_multiple_distributions(
runs = 200, ## repetitions per combination, also used for seeding
prop_monitored = seq(.05, .5, by = .05),
#n = c(100L, 250L, 500L, 1000L, 5000L, 15000L),
n = seq(100L, 500L, by = 100L),
skewness = c(1, 3, 5)
)
write_rds(scenarios, rds)
} else {
scenarios <- read_rds(rds)
}
scenarios_cleaned <-
scenarios %>%
## remove extremely skewed distributions
filter(!str_detect(distribution, "_5$")) %>%
group_by(n, prop_monitored, distribution) %>%
mutate(diff = prop_monitored - median(prop_n_monitored)) %>%
ungroup() %>%
mutate(
distribution = factor(
distribution,
levels = c(
"uniform", "random", "inner", "outer",
"bottom_1", "bottom_3", "top_1", "top_3"
)
),
prop_monitored_lab = paste0(prop_monitored * 100, "%"),
prop_monitored_lab = fct_reorder(prop_monitored_lab, prop_monitored)
)
Distribution (col) x Monitored Area (color) x Passes (x)
scenarios_cleaned %>%
group_by(distribution, prop_monitored, n) %>%
summarize(mean = mean(prop_n_monitored), sd = sd(prop_n_monitored)) %>%
ungroup() %>%
mutate(
base = mean - prop_monitored,
min = base - sd,
max = base + sd
) %>%
ggplot(aes(n, base, color = prop_monitored, group = prop_monitored)) +
geom_hline(aes(yintercept = 0), color = "grey75", size = 1.2) +
geom_line(
aes(color = prop_monitored,
color = after_scale(colorspace::desaturate(colorspace::lighten(color, .6), .4))),
size = .7, show.legend = FALSE
) +
geom_pointrange(aes(ymin = min, ymax = max), size = .3) +
facet_wrap(vars(distribution), nrow = 1) +
scale_x_discrete(expand = c(.06, .06)) +
scale_y_continuous(expand = c(.012, .012), breaks = -5:5 / 10) +
scico::scale_color_scico(
palette = "bamako", end = .8, direction = -1, name = "Proportion\ncovered\nby AUD:",
breaks = seq(.05, .5, by = .05), labels = scales::percent_format(accuracy = 1)
) +
guides(color = guide_legend(keywidth = unit(.6, "lines"), keyheight = unit(1.2, "lines"))) +
labs(x = "Number of bat passes", y = "Deviation from expected proportion") +
theme(panel.spacing.x = unit(.5, "lines"), axis.text = element_text(size = 10),
legend.text = element_text(hjust = 1))
Distribution (col) x Monitored Area (rows) x Passes (x)
box <-
ggplot(scenarios_cleaned, aes(factor(n), prop_n_monitored)) +
geom_hline(aes(yintercept = prop_monitored), color = "grey75", size = .7) +
geom_boxplot(aes(color = diff), size = .4, outlier.shape = 1, outlier.size = .3) +
scale_y_continuous(labels = scales::percent_format(),
sec.axis = dup_axis(name = "Area covered by AUD", breaks = NULL, labels = NULL)) +
scale_color_gradient2(low = "firebrick", mid = "grey40", high = "firebrick",
limits = c(-.5, .5), guide = "none") +
labs(x = "Number of bat passes", y = "Proportion of passes monitored") +
theme(panel.spacing.y = unit(.65, "lines"), axis.text = element_text(size = 10),
axis.title.y.right = element_text(size = 16, color = "grey60", face = "bold", margin = margin(l = 12)))
## fixed y scale
box + facet_grid(prop_monitored_lab ~ distribution)
ggsave(here("plots", "bat_passes", "passes_recorded_boxplots_fixed.pdf"), width = 13.5, height = 11, device = cairo_pdf)
## free y scale
box + facet_grid(prop_monitored_lab ~ distribution, scale = "free_y")
colors <- c("dodgerblue", "firebrick")
scenarios_cleaned %>%
mutate(diff_passes = (n_monitored / prop_monitored) - n) %>%
bind_rows(
tibble(
distribution = factor("uniform", levels = levels(scenarios_cleaned$distribution)),
label = c("Overestimation", "Underestimation"),
prop_monitored_lab = factor("5%", levels = levels(scenarios_cleaned$prop_monitored_lab)),
diff_passes = c(125, -125),
color = colors
)
) %>%
ggplot(aes(prop_monitored_lab, diff_passes)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 0, ymax = Inf, fill = colors[1], alpha = .05) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0, fill = colors[2], alpha = .05) +
geom_text(aes(label = label, color = color), hjust = 0, fontface = "bold", size = 6) +
geom_hline(yintercept = 0, size = .6, linetype = "31", color = "grey75") +
geom_boxplot(color = "grey45", size = .3, outlier.size = .6, outlier.alpha = .4, outlier.shape = 20) +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 2) +
scale_y_continuous(expand = c(.02, .02), breaks = seq(-500, 1000, by = 250)) +
scale_color_identity() +
labs(x = "Area covered by AUD",
y = "Absolute difference of predicted versus expected bat passes") +#,
#title = "How much do the actual numbers differ from predicted bat passes?",
#subtitle = "Absolute difference of actual bat passes and monitored bat passes") +
theme(axis.text.x = element_text(size = 8))
ggsave(here("plots", "bat_passes", "passes_difference_boxplots_abs.pdf"), width = 12, height = 9, device = cairo_pdf)
scenarios_cleaned %>%
mutate(diff_passes = ((n_monitored / prop_monitored) - n) / n) %>%
bind_rows(
tibble(
distribution = factor("uniform", levels = levels(scenarios_cleaned$distribution)),
label = c("Overestimation", "Underestimation"),
prop_monitored_lab = factor("5%", levels = levels(scenarios_cleaned$prop_monitored_lab)),
diff_passes = c(.75, -.75),
color = colors
)
) %>%
ggplot(aes(prop_monitored_lab, diff_passes)) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = 0, ymax = Inf, fill = colors[1], alpha = .05) +
annotate(geom = "rect", xmin = -Inf, xmax = Inf, ymin = -Inf, ymax = 0, fill = colors[2], alpha = .05) +
geom_text(aes(label = label, color = color), hjust = 0, fontface = "bold", size = 6) +
geom_hline(yintercept = 0, size = .6, linetype = "31", color = "grey75") +
geom_boxplot(color = "grey45", size = .3, outlier.size = .6, outlier.alpha = .4, outlier.shape = 20) +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 2) +
scale_y_continuous(expand = c(.02, .02), breaks = seq(-1, 3, by = .5)) +
scale_color_identity() +
labs(x = "Area covered by AUD",
y = "Relative difference of predicted versus expected bat passes") +#,
#title = "How much do the actual numbers differ from predicted bat passes?",
#subtitle = "Relative difference of actual bat passes and monitored bat passes") +
theme(axis.text.x = element_text(size = 8))
## with variation
scenarios_cleaned %>%
filter(prop_monitored == .05) %>%
ggplot(aes(n, n_fatalities)) +
#geom_abline(slope = .01, intercept = 0, size = .6, color = "grey75") +
geom_jitter(alpha = .02, width = 20, height = 0) +
geom_quantile(quantiles = c(.25, .75), color = "dodgerblue3", size = .6) +
geom_quantile(quantiles = c(.5), color = "red", size = .9) +
facet_grid(~ distribution) +
scale_x_continuous(expand = c(.1, .1)) +
labs(x = "Number of bat passes (reality)",
y = "Number of fatalities") + #,
#title = "What really happens",
#subtitle = "Number of fatalities per number of bat passes (1% correlation + variation)") +
theme(panel.grid.major.y = element_blank(), axis.text.x = element_text(size = 10))
ggsave(here("plots", "fatalities", "fatalities_correlation_reality_var.pdf"), width = 13.5, height = 3.2, device = cairo_pdf)
## without variation
scenarios_cleaned %>%
filter(prop_monitored == .05) %>%
mutate(n_fatalities = round(n / 100)) %>%
ggplot(aes(n, n_fatalities)) +
#geom_abline(slope = .01, intercept = 0, size = .6, color = "grey75") +
geom_point(alpha = .02) +
geom_quantile(quantiles = c(0.25, 0.75), color = "dodgerblue3", size = .6) +
geom_quantile(quantiles = c(0.5), color = "red", size = .9) +
facet_grid(~ distribution) +
scale_x_continuous(expand = c(.1, .1)) +
labs(x = "Number of bat passes (reality)",
y = "Number of fatalities") + #,
#title = "What really happens",
#subtitle = "Number of fatalities per number of bat passes (1% correlation without variation)") +
theme(panel.grid.major.y = element_blank(), axis.text.x = element_text(size = 10))
## with variation
## fixed x-scale
scenarios_cleaned %>%
ggplot(aes(n_monitored, n_fatalities)) +
#geom_abline(aes(slope = .01*prop_monitored, intercept = 0), linetype = "13", size = .6, color = "grey75") +
geom_point(alpha = .02) +
geom_quantile(quantiles = c(0.25, 0.75), color = "dodgerblue3", size = .6) +
geom_quantile(quantiles = c(0.5), color = "red", size = .9) +
facet_grid(prop_monitored_lab ~ distribution) +
scale_x_continuous(limits = c(0, 550), breaks = c(1, 1:5*100)) +
scale_y_continuous(sec.axis = dup_axis(name = "Proportion covered by AUD", breaks = NULL, labels = NULL)) +
labs(x = "Number of bat passes (monitored)",
y = "Number of fatalities") + #,
#title = "What is detected",
#subtitle = "Number of fatalities per number of monitored bat passes (1% correlation + variation)") +
theme(panel.grid.major.y = element_blank(), panel.spacing.y = unit(.65, "lines"), axis.text = element_text(size = 10),
axis.title.y.right = element_text(size = 16, color = "grey60", face = "bold", margin = margin(l = 12)))
ggsave(here("plots", "fatalities", "fatalities_correlation_monitored_var.pdf"), width = 16, height = 11.5, device = cairo_pdf)
## with variation
## free-ranging x-axis
scenarios_cleaned %>%
ggplot(aes(n_monitored, n_fatalities)) +
#geom_abline(slope = .01, intercept = 0, linetype = "13", size = .6, color = "grey75") +
geom_point(alpha = .02) +
geom_quantile(quantiles = c(0.25, 0.75), color = "dodgerblue3", size = .6) +
geom_quantile(quantiles = c(0.5), color = "red", size = .9) +
facet_grid(prop_monitored_lab ~ distribution, scales = "free_x") +
#scale_x_continuous(breaks = c(1, 1:5*100)) +
scale_y_continuous(sec.axis = dup_axis(name = "Proportion covered by AUD", breaks = NULL, labels = NULL)) +
labs(x = "Number of bat passes (monitored)",
y = "Number of fatalities") +#,
#title = "What is detected",
#subtitle = "Number of fatalities per number of monitored bat passes (1% correlation + variation); note the different x axis ranges") +
theme(panel.grid.major.y = element_blank(), panel.spacing = unit(.65, "lines"), axis.text = element_text(size = 10),
axis.title.y.right = element_text(size = 16, color = "grey60", face = "bold", margin = margin(l = 12)))
ggsave(here("plots", "fatalities", "fatalities_correlation_monitored_var_free.pdf"), width = 16, height = 11.5, device = cairo_pdf)
## without variation
scenarios_cleaned %>%
mutate(n_fatalities = round(n / 100)) %>%
ggplot(aes(n_monitored, n_fatalities)) +
#geom_abline(slope = .01, intercept = 0, linetype = "13", size = .6, color = "grey75") +
geom_point(alpha = .02) +
geom_quantile(quantiles = c(0.25, 0.75), color = "dodgerblue3", size = .6) +
geom_quantile(quantiles = c(0.5), color = "red", size = .9) +
facet_grid(prop_monitored_lab ~ distribution) +
scale_x_continuous(limits = c(0, 550), breaks = c(1, 1:5*100)) +
scale_y_continuous(sec.axis = dup_axis(name = "Proportion covered by AUD", breaks = NULL, labels = NULL)) +
labs(x = "Number of bat passes (monitored)",
y = "Number of fatalities") +#,
#title = "What is detected",
#subtitle = "Number of fatalities per number of monitored bat passes (1% correlation without variation)") +
theme(panel.grid.major.y = element_blank(), panel.spacing.y = unit(.65, "lines"), axis.text = element_text(size = 10),
axis.title.y.right = element_text(size = 16, color = "grey60", face = "bold", margin = margin(l = 12)))
scenarios_fat_pred <-
scenarios_cleaned %>%
mutate(
n_fatalities_expected = n / 100,
n_fatalities_predicted = n_monitored / 100 ,
#n_fatalities_predicted = n_monitored * (prop_monitored*100) / 100 ,
diff_fatalities = n_fatalities_expected - n_fatalities_predicted
)
scenarios_fat_pred %>%
ggplot(aes(n_fatalities_expected, n_fatalities_predicted)) +
geom_hline(yintercept = 0, size = .6, color = "grey75") +
geom_abline(slope = 1, intercept = 0, linetype = "13", size = .6, color = "grey75") +
geom_point(shape = 20, alpha = .0025, size = 2) +
geom_quantile(quantiles = c(0.25, 0.75), color = "dodgerblue3", size = .6) +
geom_quantile(quantiles = c(0.5), color = "red", size = .9) +
facet_grid(prop_monitored ~ distribution) +
labs(x = "Number of fatalities (expected, as 1% of all passes)",
y = "Number of fatalities (predicted, as 1% of passes detected by acoustic monitoring)",
title = "How good is the predicted fatality risk?",
subtitle = "Absolute difference between number of fatalities based on acoustic monitoring versus expected (1% correlation + variation)") +
theme(panel.grid.major.y = element_blank())
ggsave("./plots/scenarios_correlation_fatalities_exp_pred.pdf", width = 16, height = 16, device = cairo_pdf)
scenarios_fat_pred %>%
mutate(prop_monitored = as.factor(str_sub(prop_monitored, 2, nchar(prop_monitored)))) %>%
ggplot(aes(prop_monitored, diff_fatalities)) +
geom_boxplot(color = "grey60", size = .3, outlier.size = .4, outlier.alpha = .2, outlier.shape = 20) +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 1) +
scale_y_continuous(expand = c(.015, .015)) +
labs(x = "Proportion covered by AUD",
y = "Absolute difference in fatalities\nexpected – predicted",
title = "How good is the predicted fatality risk?",
subtitle = "Absolute difference of actual bat fatalities and predicted fatalies based on monitoring") +
theme(axis.text.x = element_text(size = 8))
ggsave("./plots/scenarios_correlation_fatalities_difference_box.pdf", width = 18, height = 6, device = cairo_pdf)
scenarios_fat_pred %>%
mutate(prop_monitored = as.factor(str_sub(prop_monitored, 2, nchar(prop_monitored)))) %>%
ggplot(aes(prop_monitored, diff_fatalities / n_fatalities_expected)) +
geom_boxplot(color = "grey60", size = .3, outlier.size = .4, outlier.alpha = .2, outlier.shape = 20) +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 1) +
scale_y_continuous(expand = c(.015, .015)) +
labs(x = "Proportion covered by AUD",
y = "Relative difference in fatalities\n(expected – predicted) / expected",
title = "How good is the predicted fatality risk?",
subtitle = "Relative difference of actual bat fatalities and predicted fatalies based on monitoring") +
theme(axis.text.x = element_text(size = 8))
ggsave("./plots/scenarios_correlation_fatalities_difference_prop_box.pdf", width = 18, height = 6, device = cairo_pdf)
scenarios_fat_pred %>%
mutate(prop_monitored = as.factor(str_sub(prop_monitored, 2, nchar(prop_monitored)))) %>%
ggplot(aes(prop_monitored, diff_fatalities)) +
geom_jitter(color = "grey90", height = 0, alpha = .1, width = .3, size = .4) +
geom_smooth(aes(group = 1), color = "grey40") +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 1) +
scale_y_continuous(expand = c(.008, .008)) +
labs(x = "Proportion covered by AUD",
y = "Absolute difference in fatalities\nexpected – predicted",
title = "How good is the predicted fatality risk?") +
theme(axis.text.x = element_text(size = 8))
ggsave("./plots/scenarios_correlation_fatalities_difference_smooth_jitter.pdf", width = 18, height = 6, device = cairo_pdf)
scenarios_fat_pred %>%
mutate(prop_monitored = as.factor(str_sub(prop_monitored, 2, nchar(prop_monitored)))) %>%
ggplot(aes(prop_monitored, diff_fatalities)) +
ggbeeswarm::geom_quasirandom(color = "grey90", alpha = .5, width = .3, size = .4) +
geom_smooth(aes(group = 1), color = "grey40") +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 1) +
scale_y_continuous(expand = c(.005, .005)) +
labs(x = "Proportion covered by AUD",
y = "Absolute difference in fatalities\nexpected – predicted",
title = "How good is the predicted fatality risk?") +
theme(axis.text.x = element_text(size = 8))
ggsave("./plots/scenarios_correlation_fatalities_difference_smooth_bee.pdf", width = 18, height = 6, device = cairo_pdf)
scenarios_fat_pred %>%
mutate(prop_monitored = as.factor(str_sub(prop_monitored, 2, nchar(prop_monitored)))) %>%
ggplot(aes(prop_monitored, diff_fatalities / n_fatalities_expected)) +
ggbeeswarm::geom_quasirandom(color = "grey90", alpha = .5, width = .3, size = .4) +
geom_smooth(aes(group = 1), color = "grey40") +
stat_summary(geom = "point", shape = 18, size = 3) +
facet_wrap(~distribution, nrow = 1) +
scale_y_continuous(expand = c(.005, .005)) +
labs(x = "Proportion covered by AUD",
y = "Relative difference in fatalities\n(expected – predicted) / expected",
title = "How good is the predicted fatality risk?",
subtitle = "Relative difference of actual bat fatalities and predicted fatalies based on monitoring") +
theme(axis.text.x = element_text(size = 8))
ggsave("./plots/scenarios_correlation_fatalities_difference_prop_smooth_bee.pdf", width = 18, height = 6, device = cairo_pdf)
Sys.time()
[1] "2022-01-17 16:23:47 CET"
R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)
Matrix products: default
locale:
[1] LC_COLLATE=German_Germany.1252 LC_CTYPE=German_Germany.1252
[3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C
[5] LC_TIME=German_Germany.1252
system code page: 65001
attached base packages:
[1] stats graphics grDevices utils datasets methods
[7] base
other attached packages:
[1] here_1.0.1 forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
[5] purrr_0.3.4 readr_2.0.2 tidyr_1.1.4 tibble_3.1.6
[9] ggplot2_3.3.5 tidyverse_1.3.1
loaded via a namespace (and not attached):
[1] colorspace_2.0-2 ellipsis_0.3.2 class_7.3-19
[4] rprojroot_2.0.2 fs_1.5.0 rstudioapi_0.13
[7] listenv_0.8.0 farver_2.1.0 MatrixModels_0.5-0
[10] scico_1.3.0 prodlim_2019.11.13 fansi_0.5.0
[13] lubridate_1.8.0 xml2_1.3.2 codetools_0.2-18
[16] splines_4.1.2 downlit_0.4.0 cachem_1.0.6
[19] knitr_1.36 jsonlite_1.7.2 pROC_1.18.0
[22] caret_6.0-90 broom_0.7.11 dbplyr_2.1.1
[25] compiler_4.1.2 httr_1.4.2 backports_1.2.1
[28] assertthat_0.2.1 Matrix_1.3-4 fastmap_1.1.0
[31] cli_3.1.0 htmltools_0.5.2 quantreg_5.86
[34] tools_4.1.2 gtable_0.3.0 glue_1.4.2
[37] reshape2_1.4.4 Rcpp_1.0.7 cellranger_1.1.0
[40] jquerylib_0.1.4 vctrs_0.3.8 pdftools_3.0.1
[43] nlme_3.1-153 conquer_1.2.1 iterators_1.0.13
[46] timeDate_3043.102 xfun_0.27 gower_0.2.2
[49] globals_0.14.0 rvest_1.0.2 lifecycle_1.0.1
[52] future_1.22.1 MASS_7.3-54 scales_1.1.1
[55] ipred_0.9-12 ragg_1.1.3 hms_1.1.1
[58] parallel_4.1.2 SparseM_1.81 yaml_2.2.1
[61] memoise_2.0.1 sass_0.4.0 distill_1.3
[64] rpart_4.1-15 stringi_1.7.5 highr_0.9
[67] foreach_1.5.1 lava_1.6.10 matrixStats_0.61.0
[70] rlang_0.4.12 pkgconfig_2.0.3 systemfonts_1.0.3
[73] evaluate_0.14 lattice_0.20-45 recipes_0.1.17
[76] labeling_0.4.2 tidyselect_1.1.1 parallelly_1.29.0
[79] plyr_1.8.6 magrittr_2.0.1 R6_2.5.1
[82] generics_0.1.1 DBI_1.1.2 pillar_1.6.4
[85] haven_2.4.3 withr_2.4.3 survival_3.2-13
[88] nnet_7.3-16 future.apply_1.8.1 modelr_0.1.8
[91] crayon_1.4.2 utf8_1.2.2 tzdb_0.1.2
[94] rmarkdown_2.11 grid_4.1.2 readxl_1.3.1
[97] data.table_1.14.2 qpdf_1.1 ModelMetrics_1.2.2.2
[100] reprex_2.0.1 digest_0.6.29 textshaping_0.3.6
[103] stats4_4.1.2 munsell_0.5.0 bslib_0.3.1
[106] askpass_1.1